#-------------------------------------------------------#
#   Quality Assessment of the Academic Freedom Index    #
#-------------------------------------------------------#

# Title: "Quality Assessment of the Academic Freedom Index: Strengths, Weaknesses, and How Best to Use It" 
# Authors: "Lott, Lars", "Spannagel, Janika"
# date: 2024-09-19
# journal: Perspectives on Politics
# DOI: TBA
# written under "R and RStudio (4.4.1)"


#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(lubridate)
library(dotwhisker)
library(estimatr)
library(texreg)
library(readxl)
library(haven)
library(scales)
library(ggeffects)
library(viridis)
library(marginaleffects)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#load coder characteristics

coder_characteristics_wide <- readRDS("data/coder_characteristics_wide_simulated_data.rds")

# load country_aggregated data

vdem_full_v13 <- readRDS(file.path("data/vdem_v13", "vdem_13_cy", "V-Dem-CY-Full+Others-v13.rds"))

# load coder level ds 

coder_level_ds <- readRDS("data/coder_level_ds_perceptions.rds")


#---------------------------------------------------------------#
#-------------Section G1: Predicting Respondent Disagreement----#
#---------------------------------------------------------------#

list_var <- c("v2cafres", "v2cafexch", "v2cainsaut", "v2casurv", "v2clacfree")

## prepare coder level data ##

summary(coder_level_ds$v2cafres_perceptmean)

ggplot(coder_level_ds, aes(x=v2cafres_perceptmean)) +
  geom_density()

ggplot(coder_level_ds, aes(x=v2cafres_perceptsd)) +
  geom_density()

## prepare coder level data ##

coder_characteristics_wide$gender  <- as.factor(coder_characteristics_wide$gender)
coder_characteristics_wide$government_employment  <- as.factor(coder_characteristics_wide$government_employment)
coder_characteristics_wide$age_block  <- as.factor(coder_characteristics_wide$age_block)
coder_characteristics_wide$bin_reside  <- as.factor(coder_characteristics_wide$bin_reside)


summary(coder_characteristics_wide$gender)
summary(coder_characteristics_wide$government_employment)
summary(coder_characteristics_wide$age_block)
summary(coder_characteristics_wide$bin_reside) 

summary(coder_characteristics_wide)


coder_level_ds <- coder_level_ds %>%
  left_join(coder_characteristics_wide, by = ("coder_id")) %>%
  mutate(reside_in_country = ifelse(country_id == reside, 1, 
                                    ifelse(country_id != reside, 0, NA)))

summary(coder_level_ds$reside_in_country)

temp_i <- coder_level_ds %>%
  mutate(historical_date = ymd(historical_date)) %>%
  group_by(country_id, historical_date) %>%
  dplyr::summarize(sd_v2cafres = sd(v2cafres_perceptmean, na.rm = TRUE), 
            n_v2cafres = sum(!is.na(v2cafres)), 
            sd_v2cafexch = sd(v2cafexch_perceptmean, na.rm = TRUE), 
            n_v2cafexch =  sum(!is.na(v2cafexch)),
            sd_v2cainsaut = sd(v2cainsaut_perceptmean, na.rm = TRUE), 
            n_v2cainsaut =  sum(!is.na(v2cainsaut)),
            sd_v2casurv = sd(v2casurv_perceptmean, na.rm = TRUE), 
            n_v2casurv =  sum(!is.na(v2casurv)),
            sd_v2clacfree = sd(v2clacfree_perceptmean, na.rm = TRUE), 
            n_v2clacfree =  sum(!is.na(v2clacfree)), 
            v2cafres_conf = mean(v2cafres_conf, na.rm = TRUE), 
            v2cafexch_conf = mean(v2cafexch_conf, na.rm = TRUE), 
            v2cainsaut_conf = mean(v2cainsaut_conf, na.rm = TRUE), 
            v2casurv_conf = mean(v2casurv_conf, na.rm = TRUE), 
            v2clacfree_conf = mean(v2clacfree_conf, na.rm = TRUE))

number_of_coders <- coder_level_ds %>%
  mutate(historical_date = ymd(historical_date)) %>%
  group_by(country_id, historical_date) %>%
  dplyr::summarize(number_coders = n_distinct(coder_id), 
                   number_of_extern_coders = sum(reside_in_country, na.rm = T)/number_coders)

temp_i <- temp_i %>%
  left_join(number_of_coders, by = c("country_id", "historical_date"))

vdem_subset_v13 <- vdem_full_v13 %>%
  mutate(historical_date = ymd(historical_date)) %>%
  dplyr::select(country_id, country_name, year, historical_date, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, v2xca_academ,
                v2x_polyarchy, v2x_liberal, v2x_libdem, e_regionpol_6C,e_gdppc, e_pop, e_cow_exports,e_cow_imports, v2x_freexp) %>%
  left_join(temp_i, by = c("country_id", "historical_date")) %>%
  filter(year >=1900) 

vdem_subset_v13$e_regionpol_6C <- as.factor(vdem_subset_v13$e_regionpol_6C)

## Freedom to research and teach ##
m1 <- lm_robust(sd_v2cafres ~  year + v2x_freexp + v2cafres + v2cafres:v2cafres + n_v2cafres + number_of_extern_coders + v2cafres_conf + e_regionpol_6C,
                clusters = country_id, 
                data = vdem_subset_v13)
summary(m1)

## Freedom of academic exchange and dissemination ##
m2 <-  lm_robust(sd_v2cafexch ~  year + v2x_freexp + v2cafexch + v2cafexch:v2cafexch + n_v2cafexch + number_of_extern_coders + v2cafexch_conf + e_regionpol_6C,
                 clusters = country_id, 
                 data = vdem_subset_v13)
summary(m2)

## Institutional autonomy ##
m3 <-  lm_robust(sd_v2cainsaut ~  year + v2x_freexp + v2cainsaut + v2cainsaut:v2cainsaut + n_v2cainsaut + number_of_extern_coders + v2cainsaut_conf + e_regionpol_6C,
                 clusters = country_id, 
                 data = vdem_subset_v13)
summary(m3)

## Campus integrity ##
m4 <-  lm_robust(sd_v2casurv ~  year + v2x_freexp + v2casurv + v2casurv:v2casurv + n_v2casurv + number_of_extern_coders + v2casurv_conf + e_regionpol_6C,
                 clusters = country_id, 
                 data = vdem_subset_v13)
summary(m4)

## Freedom of academic and cultural expression ##
m5 <-  lm_robust(sd_v2clacfree ~  year + v2x_freexp + v2clacfree + v2clacfree:v2clacfree + n_v2clacfree + number_of_extern_coders + v2clacfree_conf + e_regionpol_6C,
                 clusters = country_id, 
                 data = vdem_subset_v13)
summary(m5)

## Pooled Analysis with all indicators ##

vdem_subset_v13_pooled <- vdem_subset_v13 %>%
  dplyr::select(country_id, historical_date, year, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, sd_v2cafres, sd_v2cafexch, 
                sd_v2cainsaut, sd_v2casurv, sd_v2clacfree, v2cafres_conf, v2cafexch_conf, v2cainsaut_conf, v2casurv_conf, v2clacfree_conf,
                v2xca_academ, 
                v2x_polyarchy, v2x_liberal, v2x_libdem, e_regionpol_6C,e_gdppc, e_pop, e_cow_exports,e_cow_imports, v2x_freexp, number_coders, number_of_extern_coders) %>%
  pivot_longer(!c(country_id, historical_date, year,  v2x_polyarchy, v2x_liberal, v2x_libdem, v2xca_academ, number_coders,
                  e_regionpol_6C,e_gdppc, e_pop, e_cow_exports,e_cow_imports, v2x_freexp,  v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, number_of_extern_coders, 
                  v2cafres_conf, v2cafexch_conf, v2cainsaut_conf, v2casurv_conf, v2clacfree_conf), names_to = "indicator", values_to = "value_indicator") %>%
  drop_na(value_indicator) %>%
  mutate(v2xca_academ_sqr  = v2xca_academ^2) %>%
  filter(year >=1900) %>%
  mutate(confidence_coders_indidator = case_when(indicator == "sd_v2cafres" ~v2cafres_conf,
                                                 indicator == "sd_v2cafexch" ~v2cafexch_conf,
                                                 indicator == "sd_v2cainsaut" ~v2cainsaut_conf,
                                                 indicator == "sd_v2casurv" ~v2casurv_conf,
                                                 indicator == "sd_v2clacfree" ~v2clacfree_conf))

## pooled regression analysis ##
m6 <-  lm_robust(value_indicator ~ year + v2x_freexp +  v2xca_academ + I(v2xca_academ^2) + number_coders +  
                   number_of_extern_coders + confidence_coders_indidator + e_regionpol_6C + indicator,
                 clusters = country_id, 
                 data = vdem_subset_v13_pooled)
summary(m6)


## Table G1 Supplementary Appendix ##
texreg(list(m1, m2, m3, m4, m5, m6), 
       head.tag = TRUE, body.tag = TRUE,
       file = "outputs_simulated_data/Table_G1.tex",
       digits = 3,
       custom.coef.names = c("Intercept",
                             "Year", 
                             "Freedom of Expression", 
                             "Freedom to research and teach", 
                             "Number of Coders Freedom to research and teach", 
                             "Percentage of Extern Coders", 
                             "Rater's Mean Confidence", 
                             "Latin America and the Caribbean", 
                             "MENA", 
                             "SSA", 
                             "Western Europe and North America", 
                             "Asia and Pacific", 
                             "Freedom of academic exchange and dissemination",
                             "NoC Freedom of academic exchange and dissemination",
                             "Rater's Mean Confidence", 
                             "Institutional autonomy", 
                             "NoC Institutional autonomy", 
                             "Rater's Mean Confidence", 
                             "Campus integrity", 
                             "NoC Campus integrity", 
                             "Rater's Mean Confidence", 
                             "Freedom of academic and cultural expression", 
                             "NoC Freedom of academic and cultural expression",
                             "Rater's Mean Confidence", 
                             "Academic Freedom Index", 
                             "Academic Freedom Index sqr.",
                             "NoC Acadmeic Freedom Index", 
                             "Rater's Mean Confidence", 
                             "Freedom to research and teach",
                             "Institutional autonomy", 
                             "Campus integrity", 
                             "Freedom of academic and cultural expression"),
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Linear Models predicting respondents disagreement",
       fontsize = "scriptsize",
       stars = c(0.001, 0.01, 0.05), 
       label = "Table:G1", 
       longtable = TRUE)


#### Figure G1: Coefficient Plot ####

# Put model estimates into temporary data.frames:
model6Frame <- data.frame(term = m6$term,
                          estimate = m6$coefficients,
                          std.error = m6$std.error,
                          conf.low = m6$conf.low,
                          conf.high = m6$conf.high,
                          modelName = "Pooled Model") %>%
  by_2sd(vdem_subset_v13_pooled)

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Combine these data.frames
model6Frame <- model6Frame %>%
  filter(!str_detect(term, "(Intercept)")) %>%
  mutate(term = case_when(term == "indicatorsd_v2cafres" ~ "Dummy Freedom to research and teach", 
                          term == "indicatorsd_v2cainsaut" ~ "Dummy Institutional autonomy", 
                          term == "indicatorsd_v2casurv" ~ "Dummy Campus integrity", 
                          term == "indicatorsd_v2clacfree" ~ "Dummy Freedom of academic and cultural expression", 
                          term == "I(v2xca_academ^2)" ~ "Level of Academic Freedom squared", 
                          term == "v2xca_academ" ~ "Level of Academic Freedom", 
                          term == "year" ~ "Year", 
                          term == "v2x_freexp" ~ "Freedom of Expression",
                          term == "number_coders" ~ "Number of respondents", 
                          term == "number_of_extern_coders" ~ "Percentage External Coders", 
                          term == "confidence_coders_indidator" ~ "Rater's Mean Confidence", 
                          term == "e_regionpol_6C2" ~ "Latin America and the Caribbean", 
                          term == "e_regionpol_6C3" ~ "MENA", 
                          term == "e_regionpol_6C4" ~ "SSA", 
                          term == "e_regionpol_6C5" ~ "Western Europe and North America", 
                          term == "e_regionpol_6C6" ~ "Asia and Pacific")) %>%
  mutate(conf.low = estimate - std.error*interval2, 
         conf.high = estimate + std.error*interval2)



# Plot
ggplot(model6Frame, aes( x = factor(term, levels=c("Latin America and the Caribbean", 
                                                   "MENA",
                                                   "SSA", 
                                                   "Western Europe and North America", 
                                                   "Asia and Pacific",
                                                   "Dummy Freedom to research and teach", 
                                                   "Dummy Institutional autonomy", 
                                                   "Dummy Campus integrity", 
                                                   "Dummy Freedom of academic and cultural expression", 
                                                   "Rater's Mean Confidence", 
                                                   "Percentage External Coders",
                                                   "Number of respondents", 
                                                   "Level of Academic Freedom squared",
                                                   "Level of Academic Freedom", 
                                                   "Freedom of Expression", 
                                                   "Year")))) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 2/3)) +
  geom_pointrange(aes(y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2,),
                  lwd = 1/2, position = position_dodge(width = 2/3),
                  fill = "WHITE") +
  coord_flip() + theme_bw() +
  ggtitle("") +
  labs(color= "Models", 
       shape= "Models",
       x = "", 
       y = "Estimate") + 
  theme(legend.position="bottom") 


## Figure 3 ##

ggsave("outputs_simulated_data/Figure_G1.pdf", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)
ggsave("outputs_simulated_data/Figure_G1.png", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)


m6_me <- ggpredict(m6, terms = c("v2xca_academ[all]"))

ggplot(m6_me, aes(x = x, y = predicted)) +
  geom_line() +
  geom_ribbon(aes(ymin= conf.low, ymax = conf.high), alpha = 0.3) +
  theme_bw() +
  ggtitle("") +
  labs(color= "", 
       y= "Predicted Standard Deviation of Respondents",
       x = "Academic Freedom Index") + 
  theme(legend.position="bottom") +
  scale_color_viridis(discrete = TRUE, option = "D")


ggsave("outputs_simulated_data/Figure_G2.pdf", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)
ggsave("outputs_simulated_data/Figure_G2.png", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)

#---------------------------------------------------#
#-------------Predicting Respondent Ratings---------#
#---------------------------------------------------#

#load coder-level data 

coder_level_ds <- readRDS("data/coder_level_ds_perceptions.rds")

#load coder characteristics

## prepare coder level data ##

summary(coder_characteristics_wide)

coder_level_ds <- coder_level_ds %>%
  left_join(coder_characteristics_wide, by = ("coder_id")) %>%
  mutate(reside_in_country = ifelse(country_id == reside, 1, 
                                    ifelse(country_id != reside, 0, NA)))


## subset country-year data ##

vdemsubset_v13 <- vdem_full_v13 %>%
  dplyr::select(country_id, year, historical_date, v2x_polyarchy, v2x_liberal, v2x_libdem, e_regionpol_6C,e_gdppc, e_pop, e_cow_exports,e_cow_imports)

coder_level_ds_subset <- coder_level_ds %>%
  dplyr::select(country_text_id, country_id, coder_id, historical_date, starts_with(c("v2cafres", "v2cafexch", "v2cainsaut", 
                                                                                      "v2casurv", "v2clacfree")), gender, phd, government_employment, age_block, 
                main_country_id, reside_in_country,   
                v2zzfremrk, v2zzelcdem, v2zzlibdem, v2zztimespent, v2zzsatisf, v2zzfirstreas) %>%
  mutate(historical_date = ymd(historical_date)) %>%
  left_join(vdemsubset_v13, by = c("country_id", "historical_date")) %>%
  filter(year >= 1900)

coder_level_ds_subset$gender <- as.factor(coder_level_ds_subset$gender)
coder_level_ds_subset$age_block <- as.factor(coder_level_ds_subset$age_block)
coder_level_ds_subset$phd <- as.factor(coder_level_ds_subset$phd)
coder_level_ds_subset$government_employment <- as.factor(coder_level_ds_subset$government_employment)
coder_level_ds_subset$reside_in_country <- as.factor(coder_level_ds_subset$reside_in_country)
coder_level_ds_subset$e_regionpol_6C <- as.factor(coder_level_ds_subset$e_regionpol_6C)
coder_level_ds_subset$reside_in_country <- as.factor(coder_level_ds_subset$reside_in_country)
coder_level_ds_subset$year <- as.factor(coder_level_ds_subset$year)

coder_level_ds_subset$v2cafres_perceptmean

## Freedom to research and teach ##
m1 <- lm_robust(v2cafres_perceptmean ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc +year,
                clusters = country_id, 
                data = coder_level_ds_subset)
summary(m1)


## Freedom of academic exchange and dissemination ##
m2 <- lm_robust(v2cafexch_perceptmean ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc +year,
                clusters = country_id, 
                data = coder_level_ds_subset)
summary(m2)

## Institutional autonomy ##
m3 <- lm_robust(v2cainsaut_perceptmean ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc +year,
                clusters = country_id, 
                data = coder_level_ds_subset)
summary(m3)

## Campus integrity ##
m4 <- lm_robust(v2casurv_perceptmean ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc +year,
                clusters = country_id, 
                data = coder_level_ds_subset)
summary(m4)

## Freedom of academic and cultural expression ##
m5 <- lm_robust(v2clacfree_perceptmean ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc +year,
                clusters = country_id, 
                data = coder_level_ds_subset)
summary(m5)

# No of observations #

m1$nobs
m2$nobs
m3$nobs
m4$nobs
m5$nobs

## Pooled Analysis with all indicators ##

coder_level_ds_subset_pooled <- coder_level_ds_subset %>%
  dplyr::select(country_text_id, country_id, coder_id, historical_date, year, 
                v2cafres_perceptmean, v2cafexch_perceptmean, v2cainsaut_perceptmean, v2casurv_perceptmean, v2clacfree_perceptmean,
                v2zzfremrk, v2zzelcdem, v2zzlibdem, v2zztimespent, v2zzsatisf, v2zzfirstreas,  v2x_polyarchy, v2x_liberal, v2x_libdem,
                e_regionpol_6C,e_gdppc, e_pop, e_cow_exports,e_cow_imports, gender, age_block, phd, government_employment,reside_in_country) %>%
  pivot_longer(!c(country_text_id, country_id, coder_id, historical_date, year,v2zzfremrk, v2zzelcdem, v2zzlibdem, v2zztimespent, 
                  v2zzsatisf, v2zzfirstreas,  v2x_polyarchy, v2x_liberal, v2x_libdem, e_regionpol_6C,e_gdppc, e_pop,
                  e_cow_exports,e_cow_imports, gender, age_block, phd, government_employment,reside_in_country), names_to = "indicator", values_to = "coder_value_indicator") %>%
  drop_na(coder_value_indicator)

## pooled regression analysis ##
m6 <- lm_robust(coder_value_indicator ~gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy +
                  e_regionpol_6C + e_gdppc + year + indicator,
                clusters = country_id, 
                data = coder_level_ds_subset_pooled)
summary(m6)
m6$nobs

## Table G2 Supplementary Appendix ##
texreg(list(m1, m2, m3, m4, m5, m6), 
       head.tag = TRUE, body.tag = TRUE,
       file = "outputs_simulated_data/Table_G2.tex",
       digits = 3,
       omit.coef=c('year'), 
       custom.coef.names = c("Intercept",
                             "Respondent's Gender", 
                             "Respondent's Age: Middle Cohort", 
                             "Respondent's Age: Oldest Cohort", 
                             "Respondent: PhD education", 
                             "Respondent: Government employed", 
                             "Respondent: Reside in country", 
                             "Respondent supports freemarket", 
                             "Respondent supports electoral democracy", 
                             "Respondent supports liberal democracy", 
                             "time spent for coding", 
                             "Country liberal component", 
                             "Country electoral democracy level", 
                             "Latin America and the Caribbean", 
                             "MENA",
                             "SSA", 
                             "Western Europe and North America",
                             "Asia and Pacific",
                             "GDP pc", 
                             "Respondent supports liberal democracy * LibDem", 
                             "Respondent supports electoral democracy * EDI", 
                             "Dummy Freedom to research and teach", 
                             "Dummy Institutional Autonomy", 
                             "Dummy Campus Integrity", 
                             "Dummy Freedom of academic and cultural expression"),
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Linear Models predicting respondents rating with country characteristics",
       fontsize = "scriptsize",
       stars = c(0.001, 0.01, 0.05), 
       label = "Table:G2", 
       longtable = TRUE)

#### Figure G3: Coefficient Plot Pooled Model ####

# Put model estimates into temporary data.frames:
model6Frame <- data.frame(term = m6$term,
                          estimate = m6$coefficients,
                          std.error = m6$std.error,
                          conf.low = m6$conf.low,
                          cond.high = m6$conf.high,
                          modelName = "Pooled Model") %>%
  by_2sd(coder_level_ds_subset_pooled)


# Combine these data.frames
model6Frame <- model6Frame %>%
  filter(!str_detect(term, "(year)")) %>%
  filter(!str_detect(term, "(Intercept)")) %>%
  filter(!str_detect(term, "indicator")) %>%
  filter(!str_detect(term, "e_regionpol")) %>%
  mutate(term = case_when(term == "(Intercept)" ~ "Intercept", 
                          term == "gender1" ~ "Respondent's Gender", 
                          term == "age_block2" ~ "Respondent's Age: Middle Cohort", 
                          term == "age_block3"~  "Respondent's Age: Oldest Cohort",
                          term == "phd1" ~ "Respondent's Education: PhD", 
                          term == "government_employment1" ~ "Respondent: government employed", 
                          term == "reside_in_country1" ~ "Respondent: Reside in country", 
                          term == "v2zzfremrk" ~ "Respondent support free markets",
                          term == "v2zzelcdem" ~ "Respondent supports electoral democracy", 
                          term == "v2zzlibdem" ~ "Respondent supports liberal component", 
                          term == "v2zztimespent" ~ "Time spent for coding", 
                          term == "v2x_liberal" ~ "Liberal Component", 
                          term == "v2x_polyarchy" ~ "Electoral Democracy", 
                          term == "e_gdppc" ~ "GDP per capita", 
                          term == "v2zzlibdem:v2x_liberal" ~ "Respondent supports liberal component * Liberal Component", 
                          term == "v2zzelcdem:v2x_polyarchy" ~"Respondent supports electoral democracy * Electoral Democracy"))


# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
ggplot(model6Frame, aes( x = factor(term, levels=c("GDP per capita", 
                                                   "Time spent for coding", 
                                                   "Respondent support free markets", 
                                                   "Respondent supports electoral democracy", 
                                                   "Respondent supports liberal component", 
                                                   "Electoral Democracy",
                                                   "Liberal Component", 
                                                   "Respondent supports electoral democracy * Electoral Democracy", 
                                                   "Respondent supports liberal component * Liberal Component", 
                                                   "Respondent's Age: Oldest Cohort", 
                                                   "Respondent's Age: Middle Cohort",
                                                   "Respondent's Gender", 
                                                   "Respondent's Education: PhD", 
                                                   "Respondent: government employed", 
                                                   "Respondent: Reside in country")))) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 2/3)) +
  geom_pointrange(aes(y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2,),
                  lwd = 1/2, position = position_dodge(width = 2/3),
                  fill = "WHITE") +
  coord_flip() + theme_bw() +
  ggtitle("") +
  labs(color= "Models", 
       shape= "Models",
       x = "", 
       y = "Estimate") + 
  theme(legend.position="bottom") 

## Figure 3 ##

ggsave("outputs_simulated_data/Figure_G3.pdf", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)
ggsave("outputs_simulated_data/Figure_G3.png", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)

##Margins 

F4a <- plot_predictions(m6, condition = list("v2x_liberal", v2zzlibdem = "minmax"))  +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Predicted Respondent Ratings",
       x = "Liberal Component") + 
  theme(legend.position="bottom") +
  scale_color_grey(start = 0.05,
                   end = 0.3) +
  scale_fill_grey(start = 0.05,
                  end = 0.3)

F4b <- plot_predictions(m6, condition = list("v2x_polyarchy", v2zzelcdem = "minmax"))  +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Predicted Respondent Ratings",
       x = "Electoral Democracy Index") + 
  theme(legend.position="bottom") +
  scale_color_grey(start = 0.05,
                   end = 0.3) +
  scale_fill_grey(start = 0.05,
                  end = 0.3)

ggarrange(F4a, F4b)

ggsave("outputs_simulated_data/Figure_G4.pdf", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)
ggsave("outputs_simulated_data/Figure_G4.png", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)


#### Testing Interaction Effect between Female/Male Respondent and Reside or born in a country ####

## pooled regression analysis ##
m7 <- lm_robust(coder_value_indicator ~ gender + age_block + phd + government_employment + reside_in_country +
                  v2zzfremrk + v2zzelcdem + v2zzlibdem + v2zztimespent + v2x_liberal + v2x_polyarchy + v2zzlibdem:v2x_liberal +
                  v2zzelcdem:v2x_polyarchy + gender:reside_in_country +
                  e_regionpol_6C + e_gdppc + year + indicator,
                clusters = country_id, 
                data = coder_level_ds_subset_pooled)
summary(m7)
m7$nobs


###################################################################
#### Marginal Effects for Model 6 and Model 7 conditional on gender 
###################################################################

m6_ame <- avg_comparisons(m6,  variables = list(gender = c("0", "1")))
m6_ame <- m6_ame %>%
  mutate(contrast = "Female - Male")

f2a <- ggplot(m6_ame, aes(x = contrast, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_linerange() +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Average Marginal Effect",
       x = "Contrast") + 
  theme(legend.position="bottom") +
  scale_color_grey() +
  scale_fill_grey()

f2b <- plot_predictions(m6, condition = c("gender")) +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Predicted Values",
       x = "Gender") + 
  theme(legend.position="bottom") +
  scale_color_grey() +
  scale_fill_grey() +
  scale_x_discrete(breaks=c("0","1"),
                   labels=c("Male", "Female"))

m7_ame <- avg_comparisons(m7,  variables = list(gender = c("0", "1")), 
                          by = "reside_in_country")

f2c <- ggplot(m7_ame, aes(x = reside_in_country, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_linerange() +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Average Marginal Effect of Gender",
       x = "Contrast") + 
  theme(legend.position="bottom") +
  scale_color_grey() +
  scale_fill_grey() +
  scale_x_discrete(breaks=c("0","1"),
                   labels=c("Not reside in country", "Reside in country"))

f2d <- plot_predictions(m6, condition = c("gender", "reside_in_country"))+
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Predicted Values",
       x = "Gender") + 
  theme(legend.position="bottom") +
  scale_color_grey(labels=c("0" = "Not reside in country", "1"="Reside in country")) +
  scale_fill_grey() +
  scale_x_discrete(breaks=c("0","1"),
                   labels=c("Male", "Female")) 

ggarrange(f2a, f2b, f2c, f2d, labels = c("A", "B", "C", "D"))

ggsave("outputs_simulated_data/Figure_G5.pdf", width = 15,
       height = 15,
       units = c("cm"), dpi = 1000)
ggsave("outputs_simulated_data/Figure_G5.png", width = 15,
       height = 15,
       units = c("cm"), dpi = 1000)

